# dependencies
library(tidyverse)
library(effectsize)
library(pwr)
library(irr)
library(metafor)
library(forestploter)
library(janitor)
library(greekLetters)
library(knitr)
library(kableExtra)
dir.create("plots")
# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
df %>% mutate_if(is.numeric, janitor::round_half_up, digits = n_digits)
}
# apa format p value -----
apa_p_value <- function(p){
p_formatted <- ifelse(p >= 0.001, paste("=", round_half_up(p, 3)),
ifelse(p < 0.001, "< .001", NA))
p_formatted <- gsub(pattern = "0.", replacement = ".", x = p_formatted, fixed = TRUE)
p_formatted
}
# meta results string
meta_effect_string <- function(fit, predictions){
paste0("k = ", fit$k, ", r = ", predictions$pred,
", 95% CI [", predictions$ci.lb, ", ", predictions$ci.ub, "]",
", 95% CR [", predictions$cr.lb, ", ", predictions$cr.ub, "]",
", 95% PI [", predictions$pi.lb, ", ", predictions$pi.ub, "]",
", p = ", signif(2*pnorm(-abs(fit$zval)), digits = 1)) # exact p value from z score
}
# notation off
options(scipen = 999)
# plot theme
custom_theme <-
forest_theme(base_size = 10,
ci_lty = 1,
ci_lwd = 1.5,
ci_Theight = 0.2,
summary_fill = "black",
summary_col = "black",
footnote_cex = 0.8)
# get data
## Vahey et al.'s effect sizes
data_vahey <- read.csv("../data/data_vahey_et_al_2015.csv")
## get effect sizes reextracted from the original articles
data_reextracted <- read.csv("../data/data_reextracted_and_annotated.csv") %>%
# remove effects that merely repeat what vahey found, which are replaced by other rows that do new extractions for a corrispondonding effect.
filter(replaced_effect == FALSE) %>%
# mark each effect as clinically relevant only if both raters scored it as such
rowwise() %>%
mutate(clinically_relevant =
as.logical(max(c(clinically_relevant_criterion_rater_1,
clinically_relevant_criterion_rater_2)))) %>%
ungroup()
### remove non-criterion effects
data_reextracted_criterion <- data_reextracted %>%
filter(criterion_effect == TRUE)
### remove non-clinical criterion variables
data_reextracted_criterion_clinically_relevant <- data_reextracted_criterion %>%
filter(clinically_relevant == TRUE)
## sample sizes in published IRAP research, taken from a separate project
data_sample_sizes <- read_csv("../data/sample size data/data_irap.csv")
data_sample_sizes_vector <- data_sample_sizes |>
drop_na(n_participants_after_exclusions) |>
pull(n_participants_after_exclusions)Calculated from meta analytic effect sizes reported in forest plot and text.
Vahey et al.’s reported meta effect size estimate was r = .45, 95% CI [.40, .54], 95% CR [.23, .67]. Note that this is quite a large effect size, equivalent to Cohen’s d = 1.01.
Using this effect size, they conducted power analyses.
I used the R packages pwr and effectsize to attempt to reproduce these values.
data_power_verified <-
tibble(
test = c("Pearson’s r",
"Pearson’s r",
"Pearson’s r",
"Pearson’s r",
"Independent t-test (Cohen's d)",
"Independent t-test (Cohen's d)",
"Dependent t-test (Cohen's d)",
"Dependent t-test (Cohen's d)"),
tails = c("One-tailed",
"One-tailed",
"Two-tailed",
"Two-tailed",
"One-tailed",
"One-tailed",
"One-tailed",
"One-tailed"),
estimate = c("Point estimate",
"Lower bound of 95% Confidence Interval",
"Point estimate",
"Lower bound of 95% Confidence Interval",
"Point estimate",
"Lower bound of 95% Confidence Interval",
"Point estimate",
"Lower bound of 95% Confidence Interval"),
effect_size_vahey = c(0.45, 0.4, 0.45, 0.4, 1.01, 0.87, 1.01, 0.87),
required_n_vahey = c(29L, 37L, 36L, NA, 26L, 36L, 8L, 10L)
)
point_estimate_vahey <- .45
lower_ci_vahey <- .40
data_power_verified$required_n_verified[1] <-
pwr.r.test(n = NULL,
r = point_estimate_vahey,
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_verified$required_n_verified[2] <-
pwr.r.test(n = NULL,
r = lower_ci_vahey,
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_verified$required_n_verified[3] <-
pwr.r.test(n = NULL,
r = point_estimate_vahey,
sig.level = 0.05,
power = 0.8,
alternative = "two.sided")$n %>%
ceiling()
data_power_verified$required_n_verified[4] <-
pwr.r.test(n = NULL,
r = lower_ci_vahey,
sig.level = 0.05,
power = 0.8,
alternative = "two.sided")$n %>%
ceiling()
data_power_verified$required_n_verified[5] <-
pwr.t.test(n = NULL,
d = r_to_d(point_estimate_vahey),
type = "two.sample",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()*2
data_power_verified$required_n_verified[6] <-
pwr.t.test(n = NULL,
d = r_to_d(lower_ci_vahey),
type = "two.sample",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()*2
data_power_verified$required_n_verified[7] <-
pwr.t.test(n = NULL,
d = r_to_d(point_estimate_vahey),
type = "paired",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_verified$required_n_verified[8] <-
pwr.t.test(n = NULL,
d = r_to_d(lower_ci_vahey),
type = "paired",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_verified %>%
kable() %>%
kable_classic(full_width = FALSE)| test | tails | estimate | effect_size_vahey | required_n_vahey | required_n_verified |
|---|---|---|---|---|---|
| Pearson’s r | One-tailed | Point estimate | 0.45 | 29 | 29 |
| Pearson’s r | One-tailed | Lower bound of 95% Confidence Interval | 0.40 | 37 | 37 |
| Pearson’s r | Two-tailed | Point estimate | 0.45 | 36 | 36 |
| Pearson’s r | Two-tailed | Lower bound of 95% Confidence Interval | 0.40 | NA | 46 |
| Independent t-test (Cohen’s d) | One-tailed | Point estimate | 1.01 | 26 | 26 |
| Independent t-test (Cohen’s d) | One-tailed | Lower bound of 95% Confidence Interval | 0.87 | 36 | 34 |
| Dependent t-test (Cohen’s d) | One-tailed | Point estimate | 1.01 | 8 | 8 |
| Dependent t-test (Cohen’s d) | One-tailed | Lower bound of 95% Confidence Interval | 0.87 | 10 | 10 |
Calculated from weighted average effect sizes reported in forest plot.
The power analyses relied on the accuracy of the meta-analytic effect size. So, I then attempted to computationally reproduce the meta-analytic effect size from the effect sizes, confidence intervals, credibility intervals, and sample sizes reported in Vahey et al.’s forest plot and their description of their meta-analytic method in their manuscript.
In an email, Vahey stated that they performed a Hunter & Schmidt meta analysis, following the guide of Field & Gillett (2010) and using their code.
On inspection, Field & Gillett (2010) make a distinction between the Hunter & Schmidt method, which is distinctive for reporting credibility intervals rather than confidence intervals, and the Hedges and colleagues’ method, which is distinctive for using Fisher’s r-to-z transformations.
Vahey et al. (2015) do not report applying any transformations to their data. However, Vahey et al.’s (2015) individual effect sizes in their forest plot have asymmetric confidence intervals. This implies that a transformation was performed. It is therefore possible that Vahey et al. combined the two methods (and code) provided by Field & Gillett (2010). I first fit a Hunter & Schmidt method, then a combined Hunter & Schmidt and Hedges’ and colleagues method.
Information about implementation of Hunter & Schmidt-style meta-analysis in metafor from here.
total_n <- data_vahey %>%
distinct(article, n_forest_plot) %>%
drop_na() %>%
summarize(total_n = sum(n_forest_plot, na.rm = TRUE)) %>%
pull(total_n)
# calculate variances
data_recalculated_variance <-
escalc(measure = "COR",
ri = mean_r_forest_plot_numeric,
ni = n_forest_plot,
data = data_vahey,
vtype = "AV",
digits = 10) %>%
drop_na() %>%
select(article, article_abbreviated,
yi, vi, ni = n_forest_plot) %>%
mutate(lower = yi - sqrt(vi)*1.96,
upper = yi + sqrt(vi)*1.96)
# fit model
fit_reproduced <-
rma(yi = yi,
vi = vi,
weights = ni, # Hunter Schmidt method requires weighting by N
method = "HS", # Hunter Schmidt method
data = data_recalculated_variance,
slab = article_abbreviated,
digits = 10)
predictions_reproduced <-
predict(fit_reproduced) %>%
as.data.frame() %>%
select(-se) %>%
# cr following Field & Gillett (2010) equation 5, where s_r^2 == tau2 as they both estimate the variance in population correlations. e.g., see Field's script "h_s syntax.sps" which refers to this as both tau and s_r^2 depending on the meta analytic approach
mutate(cr.lb = pred - 1.96*sqrt(fit_reproduced$tau2),
cr.ub = pred + 1.96*sqrt(fit_reproduced$tau2))
predictions_reproduced_printing <- predictions_reproduced %>%
round_df(2)
# predictions_reproduced %>%
# round_df(2) %>%
# kable() %>%
# kable_classic(full_width = FALSE)
# plot
data_forest <- data_recalculated_variance %>%
drop_na() %>%
select(article_abbreviated,
n = ni,
r = yi,
lower,
upper) %>%
# bind_rows(tibble(article_abbreviated = "Meta",
# n = 35, # arbitrary number to make the diamond an appropriate size
# r = predictions_reproduced$pred,
# lower = predictions_reproduced$pi.lb,
# upper = predictions_reproduced$pi.ub)) %>%
bind_rows(tibble(article_abbreviated = c("Meta (confidence interval)",
"Meta (credibility interval)"),
n = 35, # arbitrary number to make the diamond an appropriate size
r = c(predictions_reproduced$pred,
predictions_reproduced$pred),
lower = c(predictions_reproduced$ci.lb,
predictions_reproduced$cr.lb),
upper = c(predictions_reproduced$ci.ub,
predictions_reproduced$cr.ub))) %>%
mutate(size = n/sum(n),
n = ifelse(str_detect(article_abbreviated, "Meta"), total_n, n),
article_abbreviated = fct_relevel(article_abbreviated,
"Meta (confidence interval)",
"Meta (credibility interval)")) %>%
mutate(` ` = paste(rep(" ", 30), collapse = " ")) %>%
select(Article = article_abbreviated, ` `, r, lower, upper, n, size) %>%
round_df(2)
p_reproduced <-
forestploter::forest(data = select(data_forest, -size),
est = data_forest$r,
lower = data_forest$lower,
upper = data_forest$upper,
sizes = 1/data_forest$size,
#is_summary = c(rep(FALSE, nrow(data_forest)-1), TRUE),
is_summary = c(rep(FALSE, nrow(data_forest)-2), TRUE, TRUE),
ci_column = 2,
ref_line = 0,
theme = custom_theme,
xlim = c(-0.3, 1.1),
ticks_at = c(-.2, 0, .2, .4, .6, .8, 1.0))
p_reproducedggplot2::ggsave(filename = "plots/forest plot Hunter & Schmidt method.pdf",
plot = p_reproduced,
device = "pdf",
width = 7.5,
height = 4.5,
units = "in")Meta effect: k = 15, r = 0.47, 95% CI [0.4, 0.54], 95% CR [0.47, 0.47], 95% PI [0.4, 0.54], p = 0.000000000000000000000000000000000000004.
Fisher’s r-to-z transformation and back transformation, with HS estimator. No Overton (1998) transformation.
# calculate variances
data_recalculated_variance_transformed <- data_vahey %>%
# # Overton transformation. Unnumbered equation between equations 8 and 9 in Field & Gillett (2010)
# mutate(mean_r_forest_plot_numeric = mean_r_forest_plot_numeric - (mean_r_forest_plot_numeric*(1-mean_r_forest_plot_numeric^2))/(2*(n_forest_plot-3))) %>%
escalc(measure = "ZCOR",
ri = mean_r_forest_plot_numeric,
ni = n_forest_plot,
data = .,
vtype = "AV",
digits = 10) %>%
drop_na() %>%
select(article, article_abbreviated,
yi, vi, ni = n_forest_plot) %>%
mutate(lower = yi - sqrt(vi)*1.96,
upper = yi + sqrt(vi)*1.96)
# fit model
fit_reproduced_transformed <-
rma(yi = yi,
vi = vi,
weights = ni, # Hunter Schmidt method requires weighting by N
method = "HS", # Hunter Schmidt method
data = data_recalculated_variance_transformed,
slab = article_abbreviated,
digits = 10)
predictions_reproduced_transformed <-
predict(fit_reproduced_transformed) %>%
as.data.frame() %>%
select(-se) %>%
# cr following Field & Gillett (2010) equation 5, where s_r^2 == tau2 as they both estimate the variance in population correlations. e.g., see Field's script "h_s syntax.sps" which refers to this as both tau and s_r^2 depending on the meta analytic approach
mutate(cr.lb = pred - 1.96*sqrt(fit_reproduced$tau2),
cr.ub = pred + 1.96*sqrt(fit_reproduced$tau2))
predictions_reproduced_transformed_backtransformed <-
predictions_reproduced_transformed %>%
mutate_all(transf.ztor) %>%
round_df(2)
# plot
data_forest_transformed <- data_recalculated_variance_transformed %>%
select(article_abbreviated,
n = ni,
r = yi,
lower,
upper) %>%
# bind_rows(tibble(article_abbreviated = "Meta",
# n = 35, # arbitrary number to make the diamond an appropriate size
# r = predictions_reproduced_transformed$pred,
# lower = predictions_reproduced_transformed$pi.lb,
# upper = predictions_reproduced_transformed$pi.ub)) %>%
bind_rows(tibble(article_abbreviated = c("Meta (confidence interval)",
"Meta (credibility interval)"),
n = 35, # arbitrary number to make the diamond an appropriate size
r = c(predictions_reproduced_transformed$pred,
predictions_reproduced_transformed$pred),
lower = c(predictions_reproduced_transformed$ci.lb,
predictions_reproduced_transformed$cr.lb),
upper = c(predictions_reproduced_transformed$ci.ub,
predictions_reproduced_transformed$cr.ub))) %>%
mutate(r = transf.ztor(r),
lower = transf.ztor(lower),
upper = transf.ztor(upper)) %>%
mutate(size = n/sum(n),
n = ifelse(str_detect(article_abbreviated, "Meta"), total_n, n),
article_abbreviated = fct_relevel(article_abbreviated,
"Meta (confidence interval)",
"Meta (credibility interval)")) %>%
mutate(` ` = paste(rep(" ", 30), collapse = " ")) %>%
select(Article = article_abbreviated, ` `, r, lower, upper, n, size) %>%
round_df(2)
p_reproduced_transformed <-
forestploter::forest(data = select(data_forest_transformed, -size),
est = data_forest_transformed$r,
lower = data_forest_transformed$lower,
upper = data_forest_transformed$upper,
sizes = 1/data_forest_transformed$size,
#is_summary = c(rep(FALSE, nrow(data_forest)-1), TRUE),
is_summary = c(rep(FALSE, nrow(data_forest)-2), TRUE, TRUE),
ci_column = 2,
ref_line = 0,
theme = custom_theme,
xlim = c(-0.3, 1.1),
ticks_at = c(-.2, 0, .2, .4, .6, .8, 1.0))
p_reproduced_transformedggplot2::ggsave(filename = "plots/forest plot Hunter & Schmidt method with Fisher's r-to-z transformations.pdf",
plot = p_reproduced_transformed,
device = "pdf",
width = 7.5,
height = 4.5,
units = "in")Meta effect: k = 15, r = 0.47, 95% CI [0.4, 0.54], 95% CR [0.47, 0.47], 95% PI [0.4, 0.54], p = 0.000000000000000000000000002.
Reported in forest plot, calculated from individual effect sizes in supplementary materials.
Next, I attempted to reproduce the weighted-mean effect sizes reported in Vahey et al.’s forest plot from the individual effect sizes they reported in their supplementary materials.
Vahey et al. reported that they weighted by degrees of freedom, and I do the same.
I noted that some of the confidence intervals in Vahey et al.’s forest plot are asymmetrical around the point estimate. This is uncommon and not accounted for by Vahey et al. detailing of how they calculated the effect sizes and their confidence intervals. However, I take them at face value as they are the most detailed data available to work from.
# recalculate and compare
data_weighted_effect_sizes <- data_vahey %>%
select(article = article_abbreviated,
r_supplementary,
df_supplementary) %>%
group_by(article) %>%
summarize(r_recalculated_weighted_mean = round_half_up(weighted.mean(x = r_supplementary, w = df_supplementary), 2)) %>%
ungroup() %>%
left_join(data_vahey %>%
select(article = article_abbreviated,
mean_r_forest_plot_numeric) %>%
drop_na(),
by = "article") %>%
mutate(congruence = ifelse(janitor::round_half_up(r_recalculated_weighted_mean, 2) == janitor::round_half_up(mean_r_forest_plot_numeric, 2), "Congruent", "Incongruent"),
congruence = fct_relevel(congruence, "Congruent", "Incongruent"),
difference = r_recalculated_weighted_mean - mean_r_forest_plot_numeric)
# plot
p_comparison_reaveraged <-
ggplot(data_weighted_effect_sizes, aes(mean_r_forest_plot_numeric, r_recalculated_weighted_mean)) +
geom_abline(slope = 1, linetype = "dotted") +
geom_point(aes(color = congruence, shape = congruence), size = 2.25) +
theme_classic() +
scale_color_viridis_d(begin = 0.25, end = 0.75) +
theme(legend.position = "top", # c(0.25, 0.85),
legend.title = element_blank()) +
#legend.box.background = element_rect(colour = "black")) +
xlim(0.35, 0.75) +
ylim(0.35, 0.75) +
xlab("Reported in Vahey et al.'s (2015)\nforest plot") +
ylab("Recalculated from Vahey et al.'s (2015)\nsupplementary materials") +
annotate("text", x = .66, y = .45, size = 3, label = "Overestimates validity") +
annotate("text", x = .45, y = .66, size = 3, label = "Underestimates validity")
p_comparison_reaveragedggplot2::ggsave(filename = "plots/scatter plot comparing effect sizes in forest plot to reaveraged from supplementary materials.pdf",
plot = p_comparison_reaveraged,
device = "pdf",
width = 4.5,
height = 4.5,
units = "in")
# table
data_weighted_effect_sizes %>%
summarize(n_incongruent = sum(ifelse(congruence == "Incongruent", TRUE, FALSE)),
percent_incongruent = round_half_up(mean(ifelse(congruence == "Incongruent", TRUE, FALSE)), 2)*100) %>%
kable() %>%
kable_classic(full_width = FALSE)| n_incongruent | percent_incongruent |
|---|---|
| 2 | 13 |
data_weighted_effect_sizes %>%
filter(difference != 0) %>%
select(article, r_recalculated_weighted_mean, mean_r_forest_plot_numeric, difference, congruence) %>%
round_df(2) %>%
kable() %>%
kable_classic(full_width = FALSE)| article | r_recalculated_weighted_mean | mean_r_forest_plot_numeric | difference | congruence |
|---|---|---|---|---|
| Nicholson, Dempsey et al. (2013) | 0.46 | 0.48 | -0.02 | Incongruent |
| Vahey et al. (2009) | 0.58 | 0.53 | 0.05 | Incongruent |
The weighted effect sizes reported in Vahey et al.’s forest plot could not be computationally reproduced from the individual effect sizes and degrees of freedom reported in their supplementary materials.
Many values were congruent, but a minority differed (k = 2 [13%]). Table includes those that differed.
Laken’s (2016) describes incorrect inclusions as inclusion of effect sizes that do not meet the inclusion criteria. Vahey et al. (2015) stated that the purpose of their meta-analysis was to “quantify how much IRAP effects from clinically-relevant responding co-vary with corresponding clinically-relevant criterion variables” (p.60). Their inclusion criterion was that “the IRAP and criterion variables must have been deemed to target some aspect of a condition included in a major psychiatric diagnostic scheme such as the Diagnostic and Statistical Manual of Mental Disorders (DSM-5, 2013) … The authors decided whether the responses measured by a given IRAP trial-type should co-vary with a specific criterion variable by consulting the relevant empirical literature.” (p.60). References to neither the specific clinical condition that was targeted by the IRAP and the criterion variable nor the specific empirical literature that Vahey et al. (2015) used to justify the inclusion of each criterion were provided in the original article or supplementary materials. Nonetheless, Vahey’s own inclusion criterion required that effects referred to covariation between an IRAP and an external clinically-relevant criterion variable, consistent with the APA dictionary of psychology definition of criterion validity (REF).
data_vahey %>%
count(involves_external_criterion) %>%
kable() %>%
kable_classic(full_width = FALSE)| involves_external_criterion | n |
|---|---|
| FALSE | 23 |
| TRUE | 33 |
“Collectively, these 15 articles yielded 56 statistical effects between various clinically focused IRAP effects and their respective criterion variables. … of the 56 statistical effects only 8 (i.e. 14%) were not initially cited by both authors for inclusion.” (Vahey et al., 2015, p. 60)
Vahey et al. did not report how many effect sizes they excluded.
I reviewed these same 15 articles and followed Vahey et al.’s (2015) same inclusion criteria (i.e., like Vahey, I did not limit myself to effect sizes reported in the articles but also considered ones implied within the articles. Like Vahey et al., where necessary I contacted the original authors to obtain additional information about effect sizes).
I extracted 308 effect sizes. Of these, 255 met the inclusion criterion of being a criterion effect. Of these, 171 met the inclusion criterion of being clincially relevant by Vahey et al.’s definition. This was 3.1 times the number extracted by Vahey et al.
data_raters <- data_reextracted_criterion %>%
select(clinically_relevant_criterion_rater_1, clinically_relevant_criterion_rater_2)
data_raters %>%
mutate(agreement = ifelse(clinically_relevant_criterion_rater_1 == clinically_relevant_criterion_rater_2, TRUE, FALSE)) %>%
summarize(interrater_percent_agreement = round_half_up(mean(agreement, na.rm = TRUE), 1)) %>%
kable() %>%
kable_classic(full_width = FALSE)| interrater_percent_agreement |
|---|
| 0.9 |
kappa2(data_raters, "unweighted")## Cohen's Kappa for 2 Raters (Weights: unweighted)
##
## Subjects = 255
## Raters = 2
## Kappa = 0.872
##
## z = 14
## p-value = 0
Excluding non-clinically relevant and effects that do not involve a relationship with an external variable, which cannot provide evidence of criterion validity.
Details of the meta-analytic strategy, which used contemporary methods:
total_n_new <- data_reextracted_criterion_clinically_relevant %>%
distinct(article, n_from_paper) %>%
group_by(article) %>%
filter(n_from_paper == max(n_from_paper)) %>%
ungroup() %>%
summarize(total_n = sum(n_from_paper, na.rm = TRUE)) %>%
pull(total_n)
# exclusions and transformations
data_for_meta_new <- data_reextracted_criterion_clinically_relevant %>%
mutate(variables = paste(irap_variable, criterion_variable, sep = "-")) %>%
dplyr::select(article, variables, r_from_paper, n_from_paper, authors_include_bh) %>%
na.omit() %>%
escalc(measure = "ZCOR",
ri = r_from_paper,
ni = n_from_paper,
data = .,
slab = paste(article, variables),
digits = 10) %>%
rename(ni = n_from_paper) %>%
dplyr::select(article, variables, yi, vi, ni, authors_include_bh) %>%
na.omit() %>%
group_by(article) %>%
mutate(es_number = row_number(),
article_abbreviated = paste(article, es_number)) %>%
ungroup() %>%
mutate(lower = yi - sqrt(vi)*1.96,
upper = yi + sqrt(vi)*1.96)
# fit
fit_new <-
rma.mv(yi = yi,
V = vi,
random = ~ 1 | article,
method = "REML",
data = data_for_meta_new,
slab = article_abbreviated,
digits = 10)
predictions_new <-
predict(fit_new) %>%
as.data.frame() %>%
select(-se) %>%
# cr following Field & Gillett (2010) equation 5
mutate(cr.lb = pred - 1.96*sqrt(fit_new$tau2),
cr.ub = pred + 1.96*sqrt(fit_new$tau2))
predictions_new_backtransformed <- predictions_new %>%
mutate_all(transf.ztor) %>%
round_df(2)
# plot
data_forest_new <- data_for_meta_new %>%
drop_na() %>%
select(article_abbreviated,
n = ni,
r = yi,
lower,
upper) %>%
# bind_rows(tibble(article_abbreviated = "Meta",
# n = 35, # arbitrary number to make the diamond an appropriate size
# r = predictions_new$pred,
# lower = predictions_new$ci.lb,
# upper = predictions_new$ci.ub)) %>%
bind_rows(tibble(article_abbreviated = c("Meta-analysis (3-level RE confidence interval)",
"Meta-analysis (3-level RE credibility interval)",
"Meta-analysis (3-level RE prediction interval)"),
n = 35, # arbitrary number to make the diamond an appropriate size
r = c(predictions_new$pred,
predictions_new$pred,
predictions_new$pred),
lower = c(predictions_new$ci.lb,
predictions_new$cr.lb,
predictions_new$pi.lb),
upper = c(predictions_new$ci.ub,
predictions_new$cr.ub,
predictions_new$pi.ub))) %>%
mutate(r = transf.ztor(r),
lower = transf.ztor(lower),
upper = transf.ztor(upper)) %>%
mutate(size = n/sum(n),
n = ifelse(str_detect(article_abbreviated, "Meta"), total_n_new, n),
article_abbreviated = fct_relevel(article_abbreviated,
"Meta-analysis (3-level RE confidence interval)",
"Meta-analysis (3-level RE credibility interval)",
"Meta-analysis (3-level RE prediction interval)")) %>%
mutate(` ` = paste(rep(" ", 35), collapse = " ")) %>%
select(Article = article_abbreviated, ` `, r, lower, upper, n, size) %>%
mutate(r = round_half_up(r, 2),
lower = round_half_up(lower, 2),
upper = round_half_up(upper, 2))
p_new <-
forestploter::forest(data = select(data_forest_new, -size),
est = data_forest_new$r,
lower = data_forest_new$lower,
upper = data_forest_new$upper,
sizes = 1/data_forest_new$size,
# is_summary = c(rep(FALSE, nrow(data_forest_new)-1), TRUE),
is_summary = c(rep(FALSE, nrow(data_forest_new)-3),
rep(TRUE, 3)),
ci_column = 2,
ref_line = 0,
theme = custom_theme,
xlim = c(-0.7, 1.1),
ticks_at = c(-.6, -.4, -.2, 0, .2, .4, .6, .8, 1.0))
p_newggplot2::ggsave(filename = "plots/forest plot new multilevel model.pdf",
plot = p_new,
device = "pdf",
width = 8,
height = 39,
units = "in")Meta effect: k = 171, r = 0.22, 95% CI [0.15, 0.29], 95% CR [0.22, 0.22], 95% PI [-0.01, 0.42], p = 0.000000004.
data_power_new <- data_power_verified %>%
mutate(effect_size_new = c(predictions_new_backtransformed$pred,
predictions_new_backtransformed$ci.lb,
predictions_new_backtransformed$pred,
predictions_new_backtransformed$ci.lb,
r_to_d(predictions_new_backtransformed$pred),
r_to_d(predictions_new_backtransformed$ci.lb),
r_to_d(predictions_new_backtransformed$pred),
r_to_d(predictions_new_backtransformed$ci.lb)),
effect_size_new = round_half_up(effect_size_new, 2))
point_estimate_new <- predictions_new_backtransformed$pred
lower_ci_new <- predictions_new_backtransformed$ci.lb
data_power_new$required_n_new[1] <-
pwr.r.test(n = NULL,
r = point_estimate_new,
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_new$required_n_new[2] <-
pwr.r.test(n = NULL,
r = lower_ci_new,
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_new$required_n_new[3] <-
pwr.r.test(n = NULL,
r = point_estimate_new,
sig.level = 0.05,
power = 0.8,
alternative = "two.sided")$n %>%
ceiling()
data_power_new$required_n_new[4] <-
pwr.r.test(n = NULL,
r = lower_ci_new,
sig.level = 0.05,
power = 0.8,
alternative = "two.sided")$n %>%
ceiling()
data_power_new$required_n_new[5] <-
pwr.t.test(n = NULL,
d = r_to_d(point_estimate_new),
type = "two.sample",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()*2
data_power_new$required_n_new[6] <-
pwr.t.test(n = NULL,
d = r_to_d(lower_ci_new),
type = "two.sample",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()*2
data_power_new$required_n_new[7] <-
pwr.t.test(n = NULL,
d = r_to_d(point_estimate_new),
type = "paired",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
data_power_new$required_n_new[8] <-
pwr.t.test(n = NULL,
d = r_to_d(lower_ci_new),
type = "paired",
sig.level = 0.05,
power = 0.8,
alternative = "greater")$n %>%
ceiling()
# print
data_power_new %>%
kable() %>%
kable_classic(full_width = FALSE)| test | tails | estimate | effect_size_vahey | required_n_vahey | required_n_verified | effect_size_new | required_n_new |
|---|---|---|---|---|---|---|---|
| Pearson’s r | One-tailed | Point estimate | 0.45 | 29 | 29 | 0.22 | 126 |
| Pearson’s r | One-tailed | Lower bound of 95% Confidence Interval | 0.40 | 37 | 37 | 0.15 | 273 |
| Pearson’s r | Two-tailed | Point estimate | 0.45 | 36 | 36 | 0.22 | 160 |
| Pearson’s r | Two-tailed | Lower bound of 95% Confidence Interval | 0.40 | NA | 46 | 0.15 | 346 |
| Independent t-test (Cohen’s d) | One-tailed | Point estimate | 1.01 | 26 | 26 | 0.45 | 124 |
| Independent t-test (Cohen’s d) | One-tailed | Lower bound of 95% Confidence Interval | 0.87 | 36 | 34 | 0.30 | 270 |
| Dependent t-test (Cohen’s d) | One-tailed | Point estimate | 1.01 | 8 | 8 | 0.45 | 32 |
| Dependent t-test (Cohen’s d) | One-tailed | Lower bound of 95% Confidence Interval | 0.87 | 10 | 10 | 0.30 | 69 |
# compare power analysis sample sizes to the published literature
data_power_new %>%
rowwise() %>%
mutate(percent_of_publications_meeting_required_n_verified = janitor::round_half_up(mean(data_sample_sizes_vector > required_n_verified)*100, 1),
percent_of_publications_meeting_required_n_new = janitor::round_half_up(mean(data_sample_sizes_vector > required_n_new)*100, 1)) %>%
ungroup() %>%
select(test, tails,
required_n_verified,
percent_of_publications_meeting_required_n_verified,
required_n_new,
percent_of_publications_meeting_required_n_new) %>%
kable() %>%
kable_classic(full_width = FALSE)| test | tails | required_n_verified | percent_of_publications_meeting_required_n_verified | required_n_new | percent_of_publications_meeting_required_n_new |
|---|---|---|---|---|---|
| Pearson’s r | One-tailed | 29 | 75.5 | 126 | 2.1 |
| Pearson’s r | One-tailed | 37 | 54.3 | 273 | 0.0 |
| Pearson’s r | Two-tailed | 36 | 55.9 | 160 | 1.1 |
| Pearson’s r | Two-tailed | 46 | 39.4 | 346 | 0.0 |
| Independent t-test (Cohen’s d) | One-tailed | 26 | 77.7 | 124 | 2.1 |
| Independent t-test (Cohen’s d) | One-tailed | 34 | 59.0 | 270 | 0.0 |
| Dependent t-test (Cohen’s d) | One-tailed | 8 | 99.5 | 32 | 62.2 |
| Dependent t-test (Cohen’s d) | One-tailed | 10 | 98.9 | 69 | 11.7 |
data_sample_sizes |>
distinct(Title, study_design_ignoring_trial_type_comparisons) |>
count(study_design_ignoring_trial_type_comparisons) |>
mutate(percent = janitor::round_half_up(n/sum(n)*100, 1)) |>
kable() |>
kable_classic(full_width = FALSE)| study_design_ignoring_trial_type_comparisons | n | percent |
|---|---|---|
| between | 74 | 48.1 |
| mixed | 32 | 20.8 |
| within | 48 | 31.2 |